function [dependent_variable,regressor_mat,time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs]=create_regression_matrices_no_NaN(dependent_variable,regressor_mat,data_array_for_regression_stacked_by_variable,pos,country_indicator_names_mapping,time_fixed_effects_dummy,country_fixed_effects_dummy,drop_warning_dummy,indicator_position)
% function [dependent_variable,data_array_for_regression]=create_regression_matrices_no_NaN(dependent_variable,data_array_for_regression)
% Create regressor matrix by dropping all time-points that have no non-NaN observations
% We need at least one country with full set of regressors
%
% Inputs:
%   - dependent_variable                                [T by n_countries] dependent variable matrix
%   - regressor_mat                                     [T by n_countries by nvars] regressor matrix before removing NaNs and adding fixed effects
%   - data_array_for_regression_stacked_by_variable     [T by n_countries by nvars] data matrix including fixed effects
%   - pos                                               [structure] containing variable position
%   - time_fixed_effects_dummy                          [boolean]   indicator whether time fixed effects are requested
%   - country_fixed_effects_dummy                       [boolean]   indicator whether country fixed effects are requested
%
% Outputs:
%   - dependent_variable                                [T_non_NaN by n_countries] dependent variable matrix after removing NaNs
%   - regressor_mat                                     [T_non_NaN by n_countries by nvars] regressor matrix after removing NaNs and adding fixed effects
%   - time_indices_without_only_NaN_obs                 [T_non_NaN by 1] indices of non-NaN variables

% Copyright (C) 2019-2023 Benjamin Born, Francesco D'Ascanio, Gernot J. Mueller, Johannes Pfeifer
%
% This is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% It is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
% 
% For a copy of the GNU General Public License,
% see <http://www.gnu.org/licenses/>.

if nargin<8
    drop_warning_dummy = 1;
end
if nargin<9
    indicator_position = [];
end
time_dim=1;
country_dim=2;
var_dim=3;

warning('I am dropping countries due to hard-coding a split.')
dependent_variable(:,[16,29,31,37])=NaN;

%% Stack matrices
if ~isempty(indicator_position)
    stacked_matrix=cat(3,regressor_mat,dependent_variable,data_array_for_regression_stacked_by_variable(:,:,indicator_position));
else
    stacked_matrix=cat(3,regressor_mat,dependent_variable);
end
problem_indicator=1;
iter=1;
while problem_indicator & iter<10
    %get NaN-entries
    NaN_entries=isnan(stacked_matrix);
    %find country-time observations with only incomplete set of variables
    country_time_NaN_obs=any(NaN_entries,var_dim);
    %find timepoints without country with  full set of variables
    timepoints_without_full_set=all(country_time_NaN_obs,country_dim);
    time_indices_without_only_NaN_obs=find(~all(country_time_NaN_obs,country_dim));
    
    %select unproblematic time-points
    stacked_matrix_without_NaN_timepoints=stacked_matrix(~timepoints_without_full_set,:,:);
    
    %% get countries to be dropped; NB: cannot affect time points, because only countries without full set of regressors are dropped
    NaN_entries=isnan(stacked_matrix_without_NaN_timepoints);
    country_time_NaN_obs=any(NaN_entries,var_dim);
    country_without_full_set=all(country_time_NaN_obs,time_dim);
    country_indices_without_only_NaN_obs=find(~country_without_full_set');

    % make sure that state-dependent FE work; does not work if there is no
    % variation in indicator; here we test only for the dummy case
    if ~isempty(indicator_position)
        split_infeasible_countries=find(all(data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,indicator_position)==1) | all(data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,indicator_position)==0));
        if isempty(split_infeasible_countries)
            problem_indicator=0;
            stacked_matrix_without_NaN_timepoints(:,:,end)=[]; %delete added indicator
        else
            stacked_matrix(:,split_infeasible_countries,end)=NaN;
        end
%         country_indices_without_only_NaN_obs=setdiff(country_indices_without_only_NaN_obs,split_infeasible_countries);
    else
        problem_indicator=0;
    end   
    iter=iter+1;
end
if any(country_without_full_set) && drop_warning_dummy
    fprintf('\n')
    country_index=find(country_without_full_set);
    fprintf('I am dropping %s because there are only NaN observations in the sample\n',country_indicator_names_mapping{data_array_for_regression_stacked_by_variable(1,find(country_without_full_set),pos.country_ident)})
    fprintf('\n')
end

stacked_matrix_final=stacked_matrix_without_NaN_timepoints(:,country_indices_without_only_NaN_obs,:);


regressor_mat=stacked_matrix_final(:,:,1:end-1);
dependent_variable=stacked_matrix_final(:,:,end);

% % find NaN for a particular timepoint for all countries in a particular
% NaN_variable_for_all_countries_index=all(isnan(regressor_mat),2);
% non_NaN_time_for_any_variable_in_all_timepoints_index=find(~any(NaN_variable_for_all_countries_index,3));
%
% first_non_NaN_obs_regressors=find(~any(NaN_variable_for_all_countries_index,3),1,'first');
% last_non_NaN_obs_regressors=find(~any(NaN_variable_for_all_countries_index,3),1,'last');
% if length(non_NaN_time_for_any_variable_in_all_timepoints_index)~=(last_non_NaN_obs_regressors-first_non_NaN_obs_regressors+1)
%     error('Missing values in between')
% end
%
%
% %% Independent variable matrix
% % find NaN for a particular timepoint for all countries in a particular
% first_non_NaN_obs_dependent=find(~all(isnan(dependent_variable),2),1,'first');
% last_non_NaN_obs_dependent=find(~all(isnan(dependent_variable),2),1,'last');
% if length(find(~all(isnan(dependent_variable),2)))~=(last_non_NaN_obs_dependent-first_non_NaN_obs_dependent+1)
%     error('Missing values in between')
% end
%
% %% Smallest common set
% first_obs=max(first_non_NaN_obs_regressors,first_non_NaN_obs_dependent);
% last_obs=min(last_non_NaN_obs_regressors,last_non_NaN_obs_dependent);

% regressor_mat=regressor_mat(first_obs:last_obs,:,:);
% dependent_variable=dependent_variable(first_obs:last_obs,:);

n_countries=size(dependent_variable,2);

%% add fixed effects
if country_fixed_effects_dummy
    country_FE_indices=[];
    for ii=1:length(country_indices_without_only_NaN_obs)
        country_FE_indices=[country_FE_indices, pos.(['Country_FE_' num2str(country_indices_without_only_NaN_obs(ii))])];
    end
    if isempty(indicator_position)
        data_array_for_regression_country_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,country_FE_indices);
    else
        data_array_for_regression_country_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,country_FE_indices);
        data_array_for_regression_country_Fixed_Effects=cat(3,data_array_for_regression_country_Fixed_Effects.*data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,indicator_position),...
            data_array_for_regression_country_Fixed_Effects.*(1-data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,indicator_position)));
    end
else
    data_array_for_regression_country_Fixed_Effects=[];
end
if time_fixed_effects_dummy
    time_FE_indices=[];
    if country_fixed_effects_dummy %drop one time FE to avoid dummy variable trap
        for ii=1:length(time_indices_without_only_NaN_obs)-1
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    else
        for ii=1:length(time_indices_without_only_NaN_obs)
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    end
    data_array_for_regression_time_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,time_FE_indices);
else
    data_array_for_regression_time_Fixed_Effects=[];
end
if ~country_fixed_effects_dummy && ~time_fixed_effects_dummy %add constant to regression
    data_array_for_regression_time_Fixed_Effects=ones(size(regressor_mat,1),n_countries);
end

regressor_mat=cat(3,regressor_mat,data_array_for_regression_country_Fixed_Effects,data_array_for_regression_time_Fixed_Effects);
